### Make tables of descriptive statistics

date()
library("tidyverse")
sessionInfo()


load("kr_analysis_dispute.rda")
load("kr_analysis_participant.rda")


## ----------------------------------------------------------------------
## Percentage of each covariate missing
## ----------------------------------------------------------------------

## Participant-level variables
mis_participant <- bind_rows(data_participant) %>%
  select(id, ccode, gdp_pwt, irst, pec, tpop, upop, distance, nuclear, pct_imports, polity2) %>%
  group_by(id, ccode) %>%
  summarise_all(~ length(unique(.))) %>%
  mutate_all(~ . > 1) %>%
  ungroup() %>%
  select(-id, -ccode) %>%
  gather(key = "variable", value = "missing") %>%
  group_by(variable) %>%
  summarise(pct_missing = 100 * mean(missing)) %>%
  mutate(pct_missing = sprintf("%.1f", pct_missing)) %>%
  arrange(desc(pct_missing)) %>%
  mutate(variable = recode(variable,
                           pct_imports = "Import Percentage",
                           gdp_pwt = "GDP",
                           polity2 = "Democracy",
                           upop = "Urban Population",
                           pec = "Energy Consumption",
                           distance = "Distance to Dispute",
                           irst = "Iron and Steel Production",
                           nuclear = "Nuclear Weapons",
                           tpop = "Total Population"))

## Dispute-level variables
mis_dispute <- bind_rows(data_dispute) %>%
  select(id, polity_a, polity_b, majpow_a, majpow_b, py_alt, s_cinc, contig) %>%
  group_by(id) %>%
  summarise_all(~ length(unique(.))) %>%
  mutate_all(~ . > 1) %>%
  ungroup() %>%
  select(-id) %>%
  gather(key = "variable", value = "missing") %>%
  group_by(variable) %>%
  summarise(pct_missing = 100 * mean(missing)) %>%
  mutate(pct_missing = sprintf("%.1f", pct_missing)) %>%
  arrange(desc(pct_missing)) %>%
  mutate(variable = recode(variable,
                           polity_a = "Democracy, Side $A$",
                           polity_b = "Democracy, Side $B$",
                           contig = "Contiguity",
                           majpow_a = "Major Power, Side $A$",
                           majpow_b = "Major Power, Side $B$",
                           py_alt = "Peace Years",
                           s_cinc = "Interest Similarity"))

## Create table
entr_participant <- str_glue("{variable} & {pct_missing} \\\\", .envir = mis_participant)
entr_dispute <- str_glue("{variable} & {pct_missing} \\\\", .envir = mis_dispute)
tab_out <- c("\\begin{tabular}{lr}",
             "\\toprule",
             "Variable & Pct.\\ Missing \\\\",
             "\\midrule",
             "\\multicolumn{2}{l}{\\it State-Level} \\\\",
             entr_participant,
             "\\midrule",
             "\\multicolumn{2}{l}{\\it Dispute-Level} \\\\",
             entr_dispute,
             "\\bottomrule",
             "\\end{tabular}")

if (!dir.exists("tables"))
  dir.create("tables")
writeLines(tab_out, con = "tables/table_A1.tex")


## -----------------------------------------------------------------------------
## Covariate descriptive statistic table
## -----------------------------------------------------------------------------

meta_terms <- tribble(
  ~term, ~transform, ~name, ~units, ~category,
  "distance", "log1p", "Distance to Dispute", "Miles", "geopolitical",
  "gdp_pwt", "log", "GDP", "Millions USD, 2011", "economic",
  "irst", "log1p", "Iron and Steel Production", "Thousands of tons", "economic",
  "pec", "log1p", "Energy Consumption", "Thousands of coal-tons", "economic",
  "tpop", "log1p", "Total Population", "Thousands of persons", "demographic",
  "upop", "log1p", "Urban Population", "Thousands of persons", "demographic",
  "nuclear", "none", "Nuclear Weapons", "Binary", "geopolitical",
  "pct_imports", "log1p", "Import Percentage", "Percentage", "economic",
  "polity2", "none", "Democracy", "Polity IV score", "political",
  "polity_a", "none", " Democracy, Side $A$", "Polity IV score", "alpha",
  "polity_b", "none", " Democracy, Side $B$", "Polity IV score", "alpha",
  "majpow_a", "none", " Major Power, Side $A$", "Binary", "alpha",
  "majpow_b", "none", " Major Power, Side $B$", "Binary", "alpha",
  "n_states_a", "log", " Participants, Side $A$", "Count", "alpha",
  "n_states_b", "log", " Participants, Side $B$", "Count", "alpha",
  "py_alt", "log1p", "Peace Years", "Years", "alpha",
  "s_cinc", "none", "Interest Similarity", "S-score", "alpha",
  "contig", "none", "Contiguity", "Binary", "alpha",
) %>%
  mutate(units = case_when(
           transform == "log" ~ paste0(units, "$^\\dag$"),
           transform == "log1p" ~ paste0(units, "$^\\ddag$"),
           TRUE ~ units
         ),
         category = fct_relevel(category, "demographic", "economic", "political", "geopolitical", "alpha"))

desc_participant <- data_participant %>%
  bind_rows() %>%
  select(gdp_pwt, irst, pec, tpop, upop, distance, nuclear, pct_imports, polity2) %>%
  gather(key = "term", value = "value") %>%
  group_by(term) %>%
  summarise(mean = mean(value), sd = sd(value)) %>%
  mutate_if(is.numeric, ~ if_else(. > 100,
                                  format(round(.), digits = NULL, big.mark = ",", scientific = FALSE, trim = TRUE),
                                  format(., digits = 2, scientific = FALSE, trim = TRUE)))

desc_dispute <- data_dispute %>%
  bind_rows() %>%
  select(polity_a, polity_b, majpow_a, majpow_b, n_states_a, n_states_b, py_alt, s_cinc, contig) %>%
  gather(key = "term", value = "value") %>%
  group_by(term) %>%
  summarise(mean = mean(value), sd = sd(value)) %>%
  mutate_if(is.numeric, ~ sprintf("%.2f", .))

tab_desc <-
  bind_rows(desc_participant, desc_dispute) %>%
  mutate(mean = str_replace(mean, "^-", "$-$")) %>%
  left_join(meta_terms, by = "term") %>%
  arrange(category, name) %>%
  group_by(category) %>%
  nest()

tab_desc_body <-
  with(tab_desc,
       map2(category, data, ~{
         title <- recode(.x,
                         "economic" = "State-Level: Economic",
                         "demographic" = "State-Level: Demographic",
                         "political" = "State-Level: Political",
                         "geopolitical" = "State-Level: Geopolitical",
                         "alpha" = "Dispute-Level")
         hdr <- str_glue("\\multicolumn{{4}}{{l}}{{\\textit{{{title}}}}} \\\\")
         rows <- with(.y, str_glue("{name} & {units} & {mean} & {sd} \\\\"))
         c("\\midrule", hdr, rows)
       })) %>%
  flatten_chr()

tab_desc_full <-
  c("\\begin{tabular}{llrr}",
    "\\toprule",
    "Variable & Units & Mean & Std.\\ Dev. \\\\",
    tab_desc_body,
    "\\bottomrule",
    "\\end{tabular}")

writeLines(tab_desc_full, con = "tables/table_A2.tex")


date()
